
**********************************************************************************************
* Code Name: Part3_Income_process.sas
* Author: Thomas Jansson, Sveriges Riksbank
* Created: March 15, 2021
* Updated: June 7, 2023 (June 7, 2021)
* Goal: Use input data from LINDA to create dataset for the estimation of the income process;
**********************************************************************************************;

**********************************************************************************************
* Introductory steps 
**********************************************************************************************;

* Government student aid values in SEK (subsidies for one semester) from CSN;
%let maxstud1993 = 8391; %let maxstud1994 = 8586; %let maxstud1995 = 8707;
%let maxstud1996 = 8829; %let maxstud1997 = 8854; %let maxstud1998 = 8878;
%let maxstud1999 = 8878; %let maxstud2000 = 8927; %let maxstud2001 = 9000;
%let maxstud2002 = 11440;%let maxstud2003 = 11640;%let maxstud2004 = 11860;
%let maxstud2005 = 11880;%let maxstud2006 = 11980;%let maxstud2007 = 12640;

* Import and save the key between the old education classification system (SUN 69) and the new one (SUN 2000);
proc import datafile="\\micro.intra\projekt\P0459$\P0459_gem\SAS DATA\Keys\nyckel-sun-69-old-sun-2000.csv" out=edu_key dbms=dlm replace;
     guessingrows = 2000;
	 delimiter = ';';
     getnames=yes;
run;

data tempinc.edu_key;
	set edu_key;
		length sunniva $3;
			sunniva = sun2000niva;
		drop sun2000niva;
run;

**********************************************************************************************
* STEP 1
* Construct variables needed for the income process for years 1993 to 2007. Use original LINDA.
**********************************************************************************************;

%macro m0bbbab(f_or_i);
%do I = 1993 %to 2007;

data char&f_or_i&I;
	set original.linda&f_or_i&I (keep = nybidnr nybidnrf bobjtyp bantf burvkodf bald bant bkon BKUNGR 
			BKUINST BLKFNOV BCIV BFAMST tstudl istudsfl tarbstl tforpl ppensspl ppenssfl
			cdispl cdisp cdisplh cprim CSFVI tfoab kkap nrv barbink kiranta uater 
			CNEGL CTRAPSFL CTRAPSPL CFAKTL cbrutto sslutex pkuslsf
			TARESE TADUBB TATJR TAKOST TKERS IFAML PPENSSFL ISTUD COVRNEGL IVPLTOTL
			%if &I =1993 %then %do; IVPL %end; %else %do; IVPLTOT %end;
			%if &I <=1999 %then %do; bsun %end; %else %do; bsuninr bsunniv %end;
			%if &I <=2004 %then %do; kv kf %end; %else %do; cdisp04 kvbrut kfbrut kv kf %end;);
	    
	if bobjtyp ^= 3 and bobjtyp ^= 5 and bobjtyp NE . and BFAMST NE ' ' and cdisplh NE 0; *Drop Death Estates;

	id = nybidnr; 										* Individual id;
		label id = 'Individual id';
	idhh = nybidnrf ; 									* Household id;
		label idhh = 'Household id';
	year = &I;											* Year;
		label year = 'Year';
	age = bald;											* Age;
		label age = 'Age';
	hh_size = bant;										* Household size;
		label hh_size = 'Household size';
	sex = bkon;											* Gender (1=man, 2=woman);
		label sex = 'Gender (1=man, 2=woman)';
	pension = sum(of ppensspl ppenssfl);				* Pension;
		label pension = 'Pension';
	studentaid  = sum(of tstudl istudsfl);				* Student Aid;
		label studentaid = 'Student Aid';
	unemploybenefits = tarbstl;							* Unemployment benefits;
		label unemploybenefits = 'Unemployment benefits';
	parentalinc = tforpl;								* Parental leave income;
		label parentalinc = 'Parental leave income';
	pos_transfers = CTRAPSFL;							* Positive non-taxable government transfers (put 0 as minimum);
	if CTRAPSFL<0 then CTRAPSFL = 0;    				
		label pos_transfers = 'Pos non taxable gov transfers';
	if BCIV = 02 or BCIV = 03 or BCIV = 07 or BCIV = 12 or BCIV = 17 then married = 1; else married = 0; * Married (dummy);
		label married = 'Married (dummy)';	
	familyrelation = substr(BFAMST,1,1);				* Role in the family;
		label familyrelation = 'Role in the family (1=Partner,2=Single parent,3=Child,4=Single,0=Others)';	
	inc = cdispl;										* Disposable income;
		label inc = 'Disposable income';
	inchh = cdisplh;									* Disposable household income (by LINDA);
		label inchh = 'Disposable income HH (by LINDA)';	
	cap_inc = kkap;										* Capital income;
		label cap_inc = 'Capital income';
	inc_earned = cprim;									* CPRIM = prim�rinkomst (primary income);
		label inc_earned = 'Income earned';
	inc_broad = csfvi;									* Broad labor income measure;
		label inc_broad = 'Income broad';
	enterpr_inc = nrv;  								* NRV = inkomst av n�ringsverksamhet (self-employment inc);
		label enterpr_inc = 'Self empl inc';
	closely_held_inc = tfoab;							* TFOAB = inkomst fr�n f�mansf�retag som redovisas som inkomst (special case of self-employment inc);
 		label closely_held_inc = 'Limited firm inc'; 
	inc_type = barbink;									* Type of income;
		label inc_type = 'Type of inc';
	region = BLKFNOV;									* Region code;	
		label region = 'Region code';
	avdrag = TARESE+TADUBB+max((TATJR+TAKOST-TKERS),0); * Deductions;
		label avdrag = 'Deductions';
	%if &I>1995 and &I<1998 and &f_or_i=f %then %do;
	sni_code = BKUNGR;									* Type of institution;
	BKUNGR_temp = put(BKUNGR,5.);
	sni_2 = substr(BKUNGR_temp,1,2);
	inst_code = put(BKUINST,4.);						* Industrial code (SNI);
		drop BKUNGR_temp;
	%end;	
	%else %do;
	sni_code = input(BKUNGR,best5.); 					* Type of institution;
	sni_2 = substr(BKUNGR,1,2);
	inst_code = BKUINST;								* Industrial code (SNI);
	%end;
		label sni_code = 'Industrial code (SNI)';
		label inst_code ='Type of institution';
	nSampled_inHHf = bantf;								* Number of sampled individuals in HH (regular LINDA);
		label nSampled_inHHf = 'Number of sampled in HH';
	sampled_f = burvkodf;								* Sampled individual (regular LINDA);
		label sampled_f = 'Sampled';

	disp_nocap = cdispl-max(kkap,0);					* Disposable income excl. capital income;
	disp_nocap_new = cdisp-kiranta-max((kv-kf),0);		* Disposable income excl. interest payments and net capital gains;
	kkap_new = kiranta-max((kv-kf),0);					* Interest payments excl. net captial gains;

	%if &I >= 2005 %then %do;
	disp_nocap_new04 = cdisp04-kiranta-kvbrut+kfbrut; 	* Disposable income excl. interest payments and net capital gains (new variable definitions after 2004);
	kkap_new04 = kiranta-kvbrut+kfbrut;					* Interest payments excl. net captial gains (new variable definitions after 2004);
	%end;
	%else %do;
	disp_nocap_new04 = disp_nocap_new;					* Note. Use old definition for 2004 and before;
	kkap_new04 = kkap_new;								
	%end;

		%if &I = 1993 %then %do; 
		*adjust for the fact that the income measures in 1993 are reported in 100s of Swedish kronor (SEK);
		studentaid = studentaid*100;
		unemploybenefits = unemploybenefits*100;
		parentalinc = parentalinc*100;
		pension = pension*100;

		inc = inc*100;
		inchh = inchh*100;
		cap_inc = cap_inc*100;
		inc_earned = inc_earned*100;
		inc_broad = inc_broad*100;
		enterpr_inc = enterpr_inc*100;
		closely_held_inc = closely_held_inc*100;
		pos_transfers = pos_transfers*100;

		disp_nocap = disp_nocap*100;
		disp_nocap_new = disp_nocap_new*100;
		disp_nocap_new04 = disp_nocap_new04*100;

		kkap_new = kkap_new*100;
		kkap_new04 = kkap_new04*100;
		avdrag = avdrag*100;
		CTRAPSFL = CTRAPSFL*100;
		%end;

	* This step is to decide occupation status: Unemployed, Retired, Student, Employed or Unknown;
	if (unemploybenefits > 0) then do;
		unemployed = 1;
		retired = 0;
		student = 0;
		employed = 0;
		unknown_stat = 0;
	end;
	else if (pension > (inc_earned + closely_held_inc + parentalinc)) then do;
		unemployed = 0;
		retired = 1;
		student = 0;
		employed = 0;
		unknown_stat = 0;
	end;
	else if (studentaid > (.95*&&maxstud&I)) then do;
		unemployed = 0;
		retired = 0;
		student = 1;
		employed = 0;
		unknown_stat = 0;
	end;
	else if ((inc_earned + closely_held_inc + parentalinc) > 0) then do;
		unemployed = 0;
		retired = 0;
		student = 0;
		employed = 1;
		unknown_stat = 0;
	end;
	else do;
		unemployed = 0;
		retired = 0;
		student = 0;
		employed = 0;
		unknown_stat = 1;
	end;
		label unemployed = 'Unemployed (dummy)';
		label retired = 'Retired (dummy)';
		label student = 'Student (dummy)';
		label employed = 'Employed (dummy)';
		label unknown_stat = 'Unknown status (dummy)';


* This step is an alternative way to decide occupation status: Unemployed, Retired, Student, Employed or Unknown;
	if (unemploybenefits > 0) then do;
		unemployed_alt = 1;
		retired_alt = 0;
		student_alt = 0;
		employed_alt = 0;
		unknown_stat_alt = 0;
	end;
	else if (pension > 0) then do;
		unemployed_alt = 0;
		retired_alt = 1;
		student_alt = 0;
		employed_alt = 0;
		unknown_stat_alt = 0;
	end;
	else if (studentaid > 0) then do;
		unemployed_alt = 0;
		retired_alt = 0;
		student_alt = 1;
		employed_alt = 0;
		unknown_stat_alt = 0;
	end;
	else if (inc_earned > 0) then do;
		unemployed_alt = 0;
		retired_alt = 0;
		student_alt = 0;
		employed_alt = 1;
		unknown_stat_alt = 0;
	end;
	else do;
		unemployed_alt = 0;
		retired_alt = 0;
		student_alt = 0;
		employed_alt = 0;
		unknown_stat_alt = 1;
	end;
		label unemployed_alt = 'Unemployed (dummy)';
		label retired_alt = 'Retired (dummy)';
		label student_alt = 'Student (dummy)';
		label employed_alt = 'Employed (dummy)';
		label unknown_stat_alt = 'Unknown status (dummy)';

	* Create dummies for working in the government sector;
	bkuinst3 = substr(inst_code,3,1);
	bkuinst4 = substr(inst_code,4,1);
		%if &I <= 1998 %then %do;
			if bkuinst3 in ('1','2') then govsector = 1; 
			else govsector = 0;		
		%end;
		%if &I >= 1999 and &I <= 2001 %then %do;
			if bkuinst4 in ('1','2') then govsector = 1; 
			else govsector = 0;		
		%end;
		%if &I >= 2002 %then %do;
			if bkuinst4 in ('1','2','3') then govsector = 1;
			else govsector = 0;	
		%end;
		label govsector = 'Work in government sector (dummy)';

	* Education codes (different before and after 1999);
	%if &I <= 1999 %then %do;
		%if &I = 1996 %then %do;
		bsunUP = bsun+100000;
		bsunadj = put(bsunUP,6.);
		bsunadj2 = substr(bsunadj,2,5);
		edu_old_code = input(bsunadj2,best5.);
		drop bsunUP bsunadj bsunadj2;
		%end;
		%else %do;
		edu_old_code = input(bsun,best5.);				* Education old code;
		%end;	
		eduorient_code = '    ';						* Education orientation (major) code;			
		edulevel_code = '   ';							* Education level code;
	%end;
	%else %do;
		edu_old_code = 0;								* Education old code;
		eduorient_code = bsuninr;						* Education orientation (major) code;
		edulevel_code = bsunniv;						* Education level code;
	%end;
		label edu_old_code = 'Edu old code';
		label eduorient_code = 'Edu orientation code';
		label edulevel_code = 'Edu level code';

	* Role in family;
	partnerdummy = (familyrelation = '1'); 				* Role in the family (1=Partner,2=Single parent,3=Child,4=Single,0=Others);
		label partnerdummy = 'Partner';
	singleparent = (familyrelation = '2');
		label singleparent = 'Single parent'; 
	child = (familyrelation = '3');
		label child = 'Child in hh';
	over18 = (age >= 18);
		label over18 = '18 or older';
	
run;

%end;
%mend;
%m0bbbab(f);


**********************************************************************************************
* STEP 2
* Construct two education variables: edulev and eduorient. 
* Identify household heads (year by year).
**********************************************************************************************;

proc sort data = tempinc.edu_key;
	by sun;
run;

%macro m1d(f_or_i);
%do y = 1993 %to 1999; *1999 and before is handled separately;

proc sort data = char&f_or_i&y;
	by edu_old_code;
run;

data linda&f_or_i&y;
	merge char&f_or_i&y(in = left) keys.edu99_key(rename = (bsun = edu_old_code)) tempinc.edu_key(rename = (sun = edu_old_code));
		by edu_old_code;
			if left;
		label edulev = 'Education level';
		label eduorient = 'Education orientation';
	if missing(edulev) then edulev = '9'; *note that missing education variables are set to 9;
	if missing(eduorient) then eduorient = '9';
		suninr3 = substr(SUN2000Inr,1,3);
		label suninr3 = 'Edu orientation 3 digit';
run;

%end;
%do y = 2000 %to 2007;
data linda&f_or_i&y;
	length id idhh year 8 edulev eduorient $ 2 suninr3 $ 3;
		set char&f_or_i&y;
			edulev = substr(edulevel_code,1,1);	
			eduorient = substr(eduorient_code,1,1);	
			suninr3 = substr(eduorient_code,1,3);
				label edulev = 'Education level';
				label eduorient = 'Education orientation';
				label suninr3 = 'Edu orientation 3 digit';
	if missing(edulev) then edulev = '9'; *note that missing education variables are set to 9;
	if missing(eduorient) then eduorient = '9';
run;
%end;

%do y = 1993 %to 2007;
* Identify household heads (year by year);
proc sort data = linda&f_or_i&y;
	by idhh partnerdummy singleparent over18 inc age id;
run;

data linda&f_or_i&y;
	set linda&f_or_i&y;
		by idhh;
			if last.idhh then hhead_y = 1;
			else hhead_y = 0;
run;

* Drop unneccesary variables;
data tempinc.incprocess&f_or_i&y;
	set linda&f_or_i&y;
			   drop edu_old_code eduorient_code edulevel_code
					bobjtyp bantf burvkodf bald bant bkon BKUNGR BCIV BFAMST 
					tstudl istudsfl tarbstl tforpl ppensspl ppenssfl 
					BLKFNOV csfvi cprim cdispl cdisplh bkuinst3 bkuinst4 BKUINST 
					BARBINK nybidnr nybidnrf kkap nrv TFOAB CTRAPSFL
					%if &y >=2000 %then %do; BSUNINR BSUNNIV %end; %else %do; bsun SUN2000Inr sunniva %end; ;
run;

data tempinc.hhead&f_or_i&y;
	set tempinc.incprocess&f_or_i&y (keep = id hhead_y);
		rename hhead_y = hhead_y&y;
run;

%end;
%mend;
%m1d(f);


**********************************************************************************************
* STEP 3
* Identify household heads (for periods 1993-1998, 1999-2007, and 1993-2007 separately)
**********************************************************************************************;

* Merge income process datasets (for periods 1993-1998, 1999-2007, and 1993-2007 separately);
%macro m4ca(f_or_i);

%let y = 1993;
data tempinc.incprocess9398&f_or_i;
	set tempinc.incprocess&f_or_i&y(keep = year idhh partnerdummy singleparent over18 inc age id);
run;

%do y = 1994 %to 1998;	
data incapp;
	set tempinc.incprocess&f_or_i&y(keep = year idhh partnerdummy singleparent over18 inc age id);
run;

proc append base = tempinc.incprocess9398&f_or_i data = incapp;
run;
%end;

%let y = 1999;
data tempinc.incprocess9907&f_or_i;
	set tempinc.incprocess&f_or_i&y(keep = year idhh partnerdummy singleparent over18 inc age id);
run;

%do y = 2000 %to 2007;
data incapp;
	set tempinc.incprocess&f_or_i&y(keep = year idhh partnerdummy singleparent over18 inc age id);
run;

proc append base = tempinc.incprocess9907&f_or_i data = incapp;
run;	
%end;

data tempinc.incprocess9307&f_or_i;
	set tempinc.incprocess9398&f_or_i;
run;

proc append base = tempinc.incprocess9307&f_or_i data = tempinc.incprocess9907&f_or_i;
	run;

%mend;
%m4ca(f);

* Identify household heads;
%macro m4cb(f_or_i);

proc sort data = tempinc.incprocess&f_or_i;
	by year idhh partnerdummy singleparent over18 inc age id;
run;

data tempinc.heads&f_or_i;
	set tempinc.incprocess&f_or_i(keep = year idhh id);
		by year idhh;
			if last.idhh then head = 1;
			else head = 0;
run;
	
proc sort data = tempinc.heads&f_or_i;
	by id idhh;
run;

proc means data = tempinc.heads&f_or_i noprint;
	by id idhh;
	output out = tempinc.sumheads&f_or_i sum(head) = nheadyear; *number of years the individual is hh head;
run;

proc sort data = tempinc.heads&f_or_i;
	by idhh;
run;

proc means data = tempinc.heads&f_or_i noprint;
	by idhh;
		where head = 1;
	output out = tempinc.hhfreq&f_or_i sum(head) = nhhyear; *number of years the hh appears in the data;
run;

data tempinc.hhfreq&f_or_i;
	set tempinc.hhfreq&f_or_i;
		keep idhh nhhyear;
run;

proc sort data = tempinc.sumheads&f_or_i;
	by idhh;
run;

proc sort data = tempinc.hhfreq&f_or_i;
	by idhh;
run;

data tempinc.sumheads&f_or_i;
	merge tempinc.sumheads&f_or_i tempinc.hhfreq&f_or_i;
		by idhh;
run;

data tempinc.remhh&f_or_i;
	set tempinc.sumheads&f_or_i;
		if nheadyear>0 and _FREQ_<nhhyear; *choose individuals who are head at least for one year but are not member of the hh all years;
run;

proc sort data = tempinc.remhh&f_or_i nodupkey;
	by idhh;
run;

data tempinc.sumheads_up&f_or_i;
	merge tempinc.sumheads&f_or_i(in = left) tempinc.remhh&f_or_i(in = right keep = idhh);
		by idhh;
	if left and ^right and nheadyear>(0.5*nhhyear) and nhhyear>=3; *keep household heads who have been head for more than half the time;
run;

proc sort data = tempinc.sumheads_up&f_or_i nodupkey dupout = dups;
	by idhh; *make sure there are no duplicates at the household level;
run; 

data tempinc.heads_final&f_or_i;
	set tempinc.sumheads_up&f_or_i (keep = idhh id);
		head&f_or_i = 1;
run;

proc sort data = tempinc.heads_final&f_or_i nodupkey;
	by idhh id; *make sure there are no duplicates at the individual-hh level;
run;

proc sort data = tempinc.heads_final&f_or_i out = famwithheads&f_or_i nodupkey;
	by idhh;
run;

data tempinc.famwithheads&f_or_i;
	set famwithheads&f_or_i (keep = idhh);
		fam_incl&f_or_i = 1;
run;

%mend;
%m4cb(9398f); *do it for 3 separate time periods;
%m4cb(9907f);
%m4cb(9307f);


* Merge the information about household heads with the original sample;
%macro m1ddee(f_or_i);
%do y = 1993 %to 2007;

proc sort data = tempinc.incprocess&f_or_i&y nodupkey;
	by idhh id;
run;

data tempinc.incprocess2&f_or_i&y;
	merge tempinc.incprocess&f_or_i&y(in = in_inc) tempinc.heads_final9398&f_or_i tempinc.heads_final9907&f_or_i tempinc.heads_final9307&f_or_i;
		by idhh id; *merge by individual;
			if in_inc = 1;
				rename head9398f = head9398 head9907f = head9907 head9307f = head9307;
run;

data tempinc.incprocess3&f_or_i&y;
	merge tempinc.incprocess2&f_or_i&y(in = in_inc) tempinc.famwithheads9398&f_or_i tempinc.famwithheads9907&f_or_i tempinc.famwithheads9307&f_or_i;
		by idhh;	*merge by household;
			if in_inc = 1;
				rename fam_incl9398f = fam_incl9398 fam_incl9907f = fam_incl9907 fam_incl9307f = fam_incl9307;
run;

proc sort data = tempinc.incprocess3&f_or_i&y nodupkey;
	by id;
run;

proc sort data = tempinc.hhead&f_or_i.1993 nodupkey;
	by id;
run;

proc sort data = tempinc.hhead&f_or_i.1999 nodupkey;
	by id;
run;

data tempinc.incprocess4&f_or_i&y;
	merge tempinc.incprocess3&f_or_i&y(in = in_inc) tempinc.hhead&f_or_i.1993 tempinc.hhead&f_or_i.1999;
		by id;
		if in_inc = 1;
run;

*adjust missing values;
data grund.incproc230423_&f_or_i&y;
	set tempinc.incprocess4&f_or_i&y;
		if fam_incl9398 =. then fam_incl9398 = 0;
		if fam_incl9907 =. then fam_incl9907 = 0;
		if fam_incl9307 =. then fam_incl9307 = 0;
		if head9398 =. and fam_incl9398 = 1 then head9398 = 0;
		if head9907 =. and fam_incl9907 = 1 then head9907 = 0;
		if head9307 =. and fam_incl9307 = 1 then head9307 = 0;
run;

proc sort data = grund.incproc230423_&f_or_i&y nodupkey;
	by id;
run;

%end;
%mend;
%m1ddee(f);


**********************************************************************************************
* STEP 4
* Output to csv format
**********************************************************************************************;

* Output to excel;
%macro m1dd(f_or_i);
%do y = 1993 %to 2007;

proc export data = grund.incproc230423_&f_or_i&y
		 outfile = "\\micro.intra\projekt\P0459$\P0459_gem\SAS DATA\Thomas\Projekt2\RFS\bal\incproc230423&y..csv" dbms=csv replace;
run;

%end;
%mend;
%m1dd(f);

